library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)
0 1 442 446
dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL
trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]
pander::pander(table(dataColonTrain$status))
| 0 | 1 |
|---|---|
| 318 | 303 |
pander::pander(table(dataColonTest$status))
| 0 | 1 |
|---|---|
| 124 | 143 |
ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)
[++-++-++-+++-++-+++–+-+-+++-+++-+++-]..
sm <- summary(ml)
pander::pander(sm$coefficients)
| Estimate | lower | HR | upper | u.Accuracy | |
|---|---|---|---|---|---|
| extent | 4.12e-01 | 1.255 | 1.510 | 1.816 | 0.554 |
| node4 | 3.97e-01 | 1.228 | 1.488 | 1.803 | 0.591 |
| rxLev_5FU_age | -5.32e-03 | 0.992 | 0.995 | 0.997 | 0.567 |
| age_nodes | 3.98e-05 | 1.000 | 1.000 | 1.000 | 0.589 |
| nodes | 2.62e-02 | 1.012 | 1.027 | 1.042 | 0.599 |
| rxLev_5FU | -1.24e-01 | 0.825 | 0.884 | 0.947 | 0.567 |
| age_extent | 1.06e-03 | 1.000 | 1.001 | 1.002 | 0.538 |
| age_node4 | 1.10e-03 | 1.000 | 1.001 | 1.002 | 0.591 |
| adhere | 2.54e-02 | 1.005 | 1.026 | 1.047 | 0.544 |
| surg | 1.74e-02 | 1.003 | 1.018 | 1.032 | 0.549 |
| r.Accuracy | full.Accuracy | u.AUC | r.AUC | full.AUC | |
|---|---|---|---|---|---|
| extent | 0.594 | 0.624 | 0.562 | 0.589 | 0.627 |
| node4 | 0.607 | 0.624 | 0.585 | 0.609 | 0.627 |
| rxLev_5FU_age | 0.595 | 0.624 | 0.571 | 0.589 | 0.627 |
| age_nodes | 0.565 | 0.609 | 0.586 | 0.567 | 0.607 |
| nodes | 0.606 | 0.622 | 0.596 | 0.606 | 0.622 |
| rxLev_5FU | 0.587 | 0.619 | 0.571 | 0.585 | 0.619 |
| age_extent | 0.608 | 0.622 | 0.539 | 0.607 | 0.622 |
| age_node4 | 0.621 | 0.615 | 0.585 | 0.621 | 0.614 |
| adhere | 0.597 | 0.612 | 0.536 | 0.595 | 0.610 |
| surg | 0.593 | 0.614 | 0.544 | 0.590 | 0.612 |
| IDI | NRI | z.IDI | z.NRI | Delta.AUC | Frequency | |
|---|---|---|---|---|---|---|
| extent | 0.02541 | 0.248 | 4.38 | 4.43 | 0.03844 | 1.0 |
| node4 | 0.02196 | 0.334 | 4.11 | 4.83 | 0.01885 | 1.0 |
| rxLev_5FU_age | 0.01880 | 0.283 | 3.99 | 3.79 | 0.03863 | 1.0 |
| age_nodes | 0.01633 | 0.268 | 3.91 | 3.49 | 0.04011 | 1.0 |
| nodes | 0.01221 | 0.222 | 3.60 | 2.88 | 0.01622 | 1.0 |
| rxLev_5FU | 0.01524 | 0.283 | 3.54 | 3.79 | 0.03390 | 0.9 |
| age_extent | 0.01211 | 0.166 | 3.07 | 2.08 | 0.01429 | 0.5 |
| age_node4 | 0.00711 | 0.279 | 2.86 | 4.09 | -0.00656 | 0.9 |
| adhere | 0.00617 | 0.144 | 2.51 | 2.54 | 0.01454 | 0.4 |
| surg | 0.00602 | 0.174 | 2.40 | 2.45 | 0.02176 | 0.3 |
Here we evaluate the model using the RRPlot() function.
Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years
index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)
h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))
rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Raw Train: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
| @:0.9 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|
| Thr | 0.520 | 0.369 | 0.241 | 0.1148 | 0.499 |
| RR | 1.511 | 1.827 | 4.524 | 19.6434 | 1.522 |
| SEN | 0.231 | 0.587 | 0.980 | 1.0000 | 0.251 |
| SPE | 0.896 | 0.704 | 0.145 | 0.0126 | 0.887 |
| BACC | 0.564 | 0.646 | 0.562 | 0.5063 | 0.569 |
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 1.01 | 0.899 | 1.13 | 0.862 |
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.51 | 1.51 | 1.49 | 1.53 |
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.32 | 1.32 | 1.32 | 1.33 |
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.667 | 0.666 | 0.636 | 0.697 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.689 | 0.648 | 0.73 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.231 | 0.185 | 0.283 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.899 | 0.861 | 0.93 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 | at_0.5 |
|---|---|---|---|---|
| 0.521 | 0.369 | 0.241 | 0.115 | 0.5 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.52 | 1.29 | 1.79 |
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 519 | 233 | 267.9 | 4.54 | 39.5 |
| class=1 | 102 | 70 | 35.1 | 34.64 | 39.5 |
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
| h0 | Gain | DeltaTime |
|---|---|---|
| 0.648 | 1.5 | 3.2 |
h0 <- calprob$h0
timeinterval <- calprob$timeInterval;
rdata <- cbind(dataColonTrain$status,calprob$prob)
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Calibrated Train: Colon",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
| @:0.9 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|
| Thr | 0.666 | 0.497 | 0.338 | 0.1667 | 0.500 |
| RR | 1.511 | 1.827 | 4.524 | 19.6434 | 1.739 |
| SEN | 0.231 | 0.587 | 0.980 | 1.0000 | 0.551 |
| SPE | 0.896 | 0.704 | 0.145 | 0.0126 | 0.717 |
| BACC | 0.564 | 0.646 | 0.562 | 0.5063 | 0.634 |
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.781 | 0.695 | 0.874 | 8.78e-06 |
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 0.987 | 0.987 | 0.975 | 1 |
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.02 | 1.02 | 1.02 | 1.02 |
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.667 | 0.667 | 0.637 | 0.696 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.689 | 0.648 | 0.73 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.231 | 0.185 | 0.283 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.899 | 0.861 | 0.93 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 | at_0.5 |
|---|---|---|---|---|
| 0.667 | 0.497 | 0.338 | 0.167 | 0.5 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.52 | 1.29 | 1.79 |
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 519 | 233 | 267.9 | 4.54 | 39.5 |
| class=1 | 102 | 70 | 35.1 | 34.64 | 39.5 |
The calibrated h0 and timeinterval were estimated on the training set
index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))
rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=dataColonTest$time,
title="Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
| @:0.667424600803063 | @:0.497313110373278 | @:0.337641790153328 | |
|---|---|---|---|
| Thr | 0.666 | 0.498 | 0.3383 |
| RR | 1.795 | 1.541 | 1.4555 |
| SEN | 0.308 | 0.615 | 0.9580 |
| SPE | 0.927 | 0.613 | 0.0806 |
| BACC | 0.618 | 0.614 | 0.5193 |
| @:0.16666168331968 | @:0.5 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|---|
| Thr | 0.15230 | 0.500 | 0.512 | 0.345 | 0.15230 | 0.500 |
| RR | 1.00000 | 1.546 | 1.791 | 2.320 | 1.00000 | 1.546 |
| SEN | 1.00000 | 0.587 | 0.517 | 0.937 | 1.00000 | 0.587 |
| SPE | 0.00806 | 0.645 | 0.790 | 0.218 | 0.00806 | 0.645 |
| BACC | 0.50403 | 0.616 | 0.654 | 0.577 | 0.50403 | 0.616 |
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.954 | 0.804 | 1.12 | 0.624 |
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.31 | 1.31 | 1.27 | 1.34 |
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.1 | 1.1 | 1.09 | 1.1 |
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.649 | 0.649 | 0.604 | 0.694 |
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.698 | 0.636 | 0.76 |
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.308 | 0.233 | 0.39 |
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.935 | 0.877 | 0.972 |
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 | at_0.5 | at_max_BACC | at_max_RR |
|---|---|---|---|---|---|---|
| 0.667 | 0.497 | 0.338 | 0.167 | 0.5 | 0.512 | 0.345 |
| atSPE100 | at_0.5 |
|---|---|
| 0.152 | 0.5 |
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.82 | 1.51 | 2.19 |
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 131 | 55 | 80.1 | 7.89156 | 18.0707 |
| class=1 | 84 | 44 | 43.4 | 0.00935 | 0.0134 |
| class=2 | 52 | 44 | 19.5 | 30.83498 | 36.1983 |
Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process
rcv <- randomCV(theData=dataColonTrain,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.75,
repetitions=50,
classSamplingType = "Pro",
testingSet=dataColonTest
)
.[+++–].[++].[++++-].[+++++].[++++++–].[+++++].[++++-].[++++++++].[++-].[+++–]10 Tested: 851 Avg. Selected: 8.1 Min Tests: 1 Max Tests: 10 Mean Tests: 4.970623 . MAD: 0.4731277
.[++++++].[++++++++].[++++-].[++—].[+-].[++++++].[+++++++].[++++-].[+++++++-].[+++++++++]20 Tested: 886 Avg. Selected: 8.85 Min Tests: 1 Max Tests: 20 Mean Tests: 9.548533 . MAD: 0.473931
.[+++].[++++++].[++++-].[++++-].[++].[+++].[++++-].[+++-].[++++-].[++-+]30 Tested: 888 Avg. Selected: 8.9 Min Tests: 2 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4730999
.[++++++].[+++-].[++++-].[++++–].[+++++++].[++++++].[++++–].[++++++].[++++-].[++++-]40 Tested: 888 Avg. Selected: 9 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4727452
.[++—].[+++++].[++++++].[++++-].[++++-].[++++++-].[+++++–].[+-+–].[++++++++].[++++–]50 Tested: 888 Avg. Selected: 8.88 Min Tests: 5 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4729526
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=times,
title="CV Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
| @:0.667424600803063 | @:0.497313110373278 | @:0.337641790153328 | |
|---|---|---|---|
| Thr | 0.668 | 0.497 | 0.3457 |
| RR | 1.649 | 1.624 | 3.5498 |
| SEN | 0.168 | 0.536 | 0.9865 |
| SPE | 0.950 | 0.706 | 0.0792 |
| BACC | 0.559 | 0.621 | 0.5329 |
| @:0.16666168331968 | @:0.5 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|---|
| Thr | 0.21300 | 0.500 | 0.490 | 0.361 | 0.21300 | 0.500 |
| RR | 1.00000 | 1.596 | 1.738 | 2.276 | 1.00000 | 1.596 |
| SEN | 1.00000 | 0.502 | 0.592 | 0.969 | 1.00000 | 0.502 |
| SPE | 0.00452 | 0.729 | 0.683 | 0.106 | 0.00452 | 0.729 |
| BACC | 0.50226 | 0.615 | 0.638 | 0.537 | 0.50226 | 0.615 |
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.8 | 0.728 | 0.878 | 1.21e-06 |
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.07 | 1.07 | 1.06 | 1.08 |
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.04 | 1.04 | 1.04 | 1.04 |
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.65 | 0.65 | 0.624 | 0.674 |
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.672 | 0.637 | 0.707 |
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.168 | 0.135 | 0.206 |
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.95 | 0.926 | 0.969 |
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 | at_0.5 | at_max_BACC | at_max_RR |
|---|---|---|---|---|---|---|
| 0.667 | 0.497 | 0.338 | 0.167 | 0.5 | 0.49 | 0.361 |
| atSPE100 | at_0.5 |
|---|---|
| 0.213 | 0.5 |
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.65 | 1.45 | 1.88 |
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 520 | 208 | 295.5 | 25.9 | 77.3 |
| class=1 | 271 | 163 | 117.9 | 17.3 | 23.6 |
| class=2 | 97 | 75 | 32.7 | 54.8 | 59.6 |